home *** CD-ROM | disk | FTP | other *** search
/ Amiga Format CD 41 / Amiga Format CD41 (1999-06)(Future Publishing)(GB)[!][issue 1999-07].iso / -seriously_amiga- / programming / other / ppc_c2p / ppc_c2p_8bit.s < prev    next >
Text File  |  1999-04-19  |  5KB  |  213 lines

  1. # 8bit c2p converter. 
  2. # written by Jacek Cybularczyk (aka Noe / Venus Art)
  3.  
  4.                 .include macros.i
  5.  
  6.  
  7. ##############################################################################
  8. # C2P converter for non-interleaved planes
  9. # Optimized for two Integer Units (like in MPC604)
  10. #
  11. # IN:
  12. # r3    pointer to chunky buffer (allocated on 32-byte boundary for better performance)
  13. # r4    pointer to table of plane pointers (planes[8])
  14. # r5    width (aligned to 64 pixels)
  15. # r6    height
  16. # OUT:
  17. # none
  18.  
  19.         .extern    _C2P_NI
  20.         .extern    C2P_NI
  21.  
  22.         .align    4
  23.  
  24. _C2P_NI:
  25. C2P_NI:
  26.         subi    r1,r1,(32-10)*4
  27.         stw    r2,0(r1)
  28.         stmw    r11,4(r1)
  29.  
  30.         srwi    r5,r5,6
  31.         mullw    r31,r5,r6
  32.         mtctr    r31
  33.  
  34.         .set    plane0,r4
  35.         .set    plane1,r5
  36.         .set    plane2,r6
  37.         .set    plane3,r7
  38.         .set    plane4,r8
  39.         .set    plane5,r9
  40.         .set    plane6,r10
  41.         .set    plane7,r31
  42.  
  43.         lwz    plane7,28(r4)
  44.         subi    plane7,plane7,4
  45.         lwz    plane6,24(r4)
  46.         subi    plane6,plane6,4
  47.         lwz    plane5,20(r4)
  48.         subi    plane5,plane5,4
  49.         lwz    plane4,16(r4)
  50.         subi    plane4,plane4,4
  51.         lwz    plane3,12(r4)
  52.         subi    plane3,plane3,4
  53.         lwz    plane2,8(r4)
  54.         subi    plane2,plane2,4
  55.         lwz    plane1,4(r4)
  56.         subi    plane1,plane1,4
  57.         lwz    plane0,0(r4)
  58.         subi    plane0,plane0,4
  59.  
  60.         .set    src,r3
  61.         .set    temp0,r30
  62.         .set    temp1,r29
  63.  
  64.         dcbt    0,(src)            # pre-fill cache line
  65.  
  66.         .set    mask1,r28
  67.         .set    mask2,r27
  68.         .set    mask4,r26
  69.  
  70.         addis    mask1,0,0x5555
  71.         ori    mask1,mask1,0x5555
  72.  
  73.         addis    mask2,0,0x3333
  74.         ori    mask2,mask2,0x3333
  75.  
  76.         addis    mask4,0,0x0f0f
  77.         ori    mask4,mask4,0x0f0f
  78.  
  79.         .set    reg00,r25
  80.         .set    reg01,r24
  81.         .set    reg02,r23
  82.         .set    reg03,r22
  83.         .set    reg04,r21
  84.         .set    reg05,r20
  85.         .set    reg06,r19
  86.         .set    reg07,r18
  87.         .set    reg10,r17
  88.         .set    reg11,r16
  89.         .set    reg12,r15
  90.         .set    reg13,r14
  91.         .set    reg14,r13
  92.         .set    reg15,r12
  93.         .set    reg16,r11
  94.         .set    reg17,r2
  95.  
  96.         lwz    reg00,0(src)
  97.         lwz    reg04,4(src)
  98.         lwz    reg01,8(src)
  99.         lwz    reg05,12(src)
  100.         lwz    reg02,16(src)
  101.         lwz    reg06,20(src)
  102.         lwz    reg03,24(src)
  103.         lwz    reg07,28(src)
  104.  
  105.         addi    src,src,32
  106.         dcbt    0,src            # fill cache line
  107.  
  108.         MERGE_16BITS2    reg00,reg02,temp0,reg01,reg03,temp1
  109.         MERGE_16BITS2    reg04,reg06,temp0,reg05,reg07,temp1
  110.         lwz    reg10,0(src)
  111.         MERGE_8BITS2    reg00,reg01,temp0,reg02,reg03,temp1
  112.         lwz    reg14,4(src)
  113.         MERGE_8BITS2    reg04,reg05,temp0,reg06,reg07,temp1
  114.         lwz    reg11,8(src)
  115.         MERGE_nBITS2    reg00,reg04,temp0,reg01,reg05,temp1,mask4,4
  116.         lwz    reg15,12(src)
  117.         MERGE_nBITS2    reg02,reg06,temp0,reg03,reg07,temp1,mask4,4
  118.         lwz    reg12,16(src)
  119.         MERGE_nBITS2    reg00,reg02,temp0,reg01,reg03,temp1,mask2,2
  120.         lwz    reg16,20(src)
  121.         MERGE_nBITS2    reg04,reg06,temp0,reg05,reg07,temp1,mask2,2
  122.         lwz    reg13,24(src)
  123.         MERGE_nBITS2    reg00,reg01,temp0,reg02,reg03,temp1,mask1,1
  124.         lwz    reg17,28(src)
  125.         MERGE_nBITS2    reg04,reg05,temp0,reg06,reg07,temp1,mask1,1
  126.         addi    src,src,32
  127.         dcbt    0,src            # fill cache line
  128.  
  129.         b    C2P_NI_mid
  130.  
  131. C2P_NI_loop:
  132.         stwu    reg10,4(plane7)
  133.         MERGE_16BITS2    reg00,reg02,temp0,reg01,reg03,temp1
  134.         MERGE_16BITS2    reg04,reg06,temp0,reg05,reg07,temp1
  135.         lwz    reg10,0(src)
  136.         stwu    reg14,4(plane3)
  137.         MERGE_8BITS2    reg00,reg01,temp0,reg02,reg03,temp1
  138.         lwz    reg14,4(src)
  139.         stwu    reg11,4(plane6)
  140.         MERGE_8BITS2    reg04,reg05,temp0,reg06,reg07,temp1
  141.         lwz    reg11,8(src)
  142.         stwu    reg15,4(plane2)
  143.         MERGE_nBITS2    reg00,reg04,temp0,reg01,reg05,temp1,mask4,4
  144.         lwz    reg15,12(src)
  145.         stwu    reg12,4(plane5)
  146.         MERGE_nBITS2    reg02,reg06,temp0,reg03,reg07,temp1,mask4,4
  147.         lwz    reg12,16(src)
  148.         stwu    reg16,4(plane1)
  149.         MERGE_nBITS2    reg00,reg02,temp0,reg01,reg03,temp1,mask2,2
  150.         lwz    reg16,20(src)
  151.         stwu    reg13,4(plane4)
  152.         MERGE_nBITS2    reg04,reg06,temp0,reg05,reg07,temp1,mask2,2
  153.         lwz    reg13,24(src)
  154.         stwu    reg17,4(plane0)
  155.         MERGE_nBITS2    reg00,reg01,temp0,reg02,reg03,temp1,mask1,1
  156.         lwz    reg17,28(src)
  157.         addi    src,src,32
  158.         dcbt    0,src            # fill cache line
  159.         MERGE_nBITS2    reg04,reg05,temp0,reg06,reg07,temp1,mask1,1
  160.  
  161. C2P_NI_mid:
  162.         stwu    reg00,4(plane7)
  163.         MERGE_16BITS2    reg10,reg12,temp0,reg11,reg13,temp1
  164.         MERGE_16BITS2    reg14,reg16,temp0,reg15,reg17,temp1
  165.         lwz    reg00,0(src)
  166.         stwu    reg04,4(plane3)
  167.         MERGE_8BITS2    reg10,reg11,temp0,reg12,reg13,temp1
  168.         lwz    reg04,4(src)
  169.         stwu    reg01,4(plane6)
  170.         MERGE_8BITS2    reg14,reg15,temp0,reg16,reg17,temp1
  171.         lwz    reg01,8(src)
  172.         stwu    reg05,4(plane2)
  173.         MERGE_nBITS2    reg10,reg14,temp0,reg11,reg15,temp1,mask4,4
  174.         lwz    reg05,12(src)
  175.         stwu    reg02,4(plane5)
  176.         MERGE_nBITS2    reg12,reg16,temp0,reg13,reg17,temp1,mask4,4
  177.         lwz    reg02,16(src)
  178.         stwu    reg06,4(plane1)
  179.         MERGE_nBITS2    reg10,reg12,temp0,reg11,reg13,temp1,mask2,2
  180.         lwz    reg06,20(src)
  181.         stwu    reg03,4(plane4)
  182.         MERGE_nBITS2    reg14,reg16,temp0,reg15,reg17,temp1,mask2,2
  183.         lwz    reg03,24(src)
  184.         stwu    reg07,4(plane0)
  185.         MERGE_nBITS2    reg10,reg11,temp0,reg12,reg13,temp1,mask1,1
  186.         lwz    reg07,28(src)
  187.         addi    src,src,32
  188.         dcbt    0,src            # fill cache line
  189.         MERGE_nBITS2    reg14,reg15,temp0,reg16,reg17,temp1,mask1,1
  190.  
  191.         bdnz    C2P_NI_loop
  192.  
  193.         stwu    reg10,4(plane7)
  194.         stwu    reg11,4(plane6)
  195.         stwu    reg12,4(plane5)
  196.         stwu    reg13,4(plane4)
  197.         stwu    reg14,4(plane3)
  198.         stwu    reg15,4(plane2)
  199.         stwu    reg16,4(plane1)
  200.         stwu    reg17,4(plane0)
  201.  
  202.         lwz    r2,0(r1)
  203.         lmw    r11,4(r1)
  204.         addi    r1,r1,(32-10)*4
  205.  
  206.         blr
  207.  
  208.         .type    C2P_NI,@function
  209.         .size    C2P_NI,$-C2P_NI
  210.  
  211.  
  212. ##############################################################################
  213.